/*-------------------------------------------------------------------------
 *
 * jsonb_gin.c
 *     GIN support functions for jsonb
 *
 * Copyright (c) 2014-2017, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *      src/backend/utils/adt/jsonb_gin.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/gin.h"
#include "access/hash.h"
#include "access/stratnum.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/jsonb.h"
#include "utils/varlena.h"

typedef struct PathHashStack
{
    uint32        hash;
    struct PathHashStack *parent;
} PathHashStack;

static Datum make_text_key(char flag, const char *str, int len);
static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);

/*
 *
 * jsonb_ops GIN opclass support functions
 *
 */

Datum
gin_compare_jsonb(PG_FUNCTION_ARGS)
{
    text       *arg1 = PG_GETARG_TEXT_PP(0);
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int32        result;
    char       *a1p,
               *a2p;
    int            len1,
                len2;

    a1p = VARDATA_ANY(arg1);
    a2p = VARDATA_ANY(arg2);

    len1 = VARSIZE_ANY_EXHDR(arg1);
    len2 = VARSIZE_ANY_EXHDR(arg2);

    /* Compare text as bttextcmp does, but always using C collation */
    result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);

    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);

    PG_RETURN_INT32(result);
}

Datum
gin_extract_jsonb(PG_FUNCTION_ARGS)
{
    Jsonb       *jb = (Jsonb *) PG_GETARG_JSONB(0);
    int32       *nentries = (int32 *) PG_GETARG_POINTER(1);
    int            total = 2 * JB_ROOT_COUNT(jb);
    JsonbIterator *it;
    JsonbValue    v;
    JsonbIteratorToken r;
    int            i = 0;
    Datum       *entries;

    /* If the root level is empty, we certainly have no keys */
    if (total == 0)
    {
        *nentries = 0;
        PG_RETURN_POINTER(NULL);
    }

    /* Otherwise, use 2 * root count as initial estimate of result size */
    entries = (Datum *) palloc(sizeof(Datum) * total);

    it = JsonbIteratorInit(&jb->root);

    while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
    {
        /* Since we recurse into the object, we might need more space */
        if (i >= total)
        {
            total *= 2;
            entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
        }

        switch (r)
        {
            case WJB_KEY:
                entries[i++] = make_scalar_key(&v, true);
                break;
            case WJB_ELEM:
                /* Pretend string array elements are keys, see jsonb.h */
                entries[i++] = make_scalar_key(&v, (v.type == jbvString));
                break;
            case WJB_VALUE:
                entries[i++] = make_scalar_key(&v, false);
                break;
            default:
                /* we can ignore structural items */
                break;
        }
    }

    *nentries = i;

    PG_RETURN_POINTER(entries);
}

Datum
gin_extract_jsonb_query(PG_FUNCTION_ARGS)
{// #lizard forgives
    int32       *nentries = (int32 *) PG_GETARG_POINTER(1);
    StrategyNumber strategy = PG_GETARG_UINT16(2);
    int32       *searchMode = (int32 *) PG_GETARG_POINTER(6);
    Datum       *entries;

    if (strategy == JsonbContainsStrategyNumber)
    {
        /* Query is a jsonb, so just apply gin_extract_jsonb... */
        entries = (Datum *)
            DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
                                                PG_GETARG_DATUM(0),
                                                PointerGetDatum(nentries)));
        /* ...although "contains {}" requires a full index scan */
        if (*nentries == 0)
            *searchMode = GIN_SEARCH_MODE_ALL;
    }
    else if (strategy == JsonbExistsStrategyNumber)
    {
        /* Query is a text string, which we treat as a key */
        text       *query = PG_GETARG_TEXT_PP(0);

        *nentries = 1;
        entries = (Datum *) palloc(sizeof(Datum));
        entries[0] = make_text_key(JGINFLAG_KEY,
                                   VARDATA_ANY(query),
                                   VARSIZE_ANY_EXHDR(query));
    }
    else if (strategy == JsonbExistsAnyStrategyNumber ||
             strategy == JsonbExistsAllStrategyNumber)
    {
        /* Query is a text array; each element is treated as a key */
        ArrayType  *query = PG_GETARG_ARRAYTYPE_P(0);
        Datum       *key_datums;
        bool       *key_nulls;
        int            key_count;
        int            i,
                    j;

        deconstruct_array(query,
                          TEXTOID, -1, false, 'i',
                          &key_datums, &key_nulls, &key_count);

        entries = (Datum *) palloc(sizeof(Datum) * key_count);

        for (i = 0, j = 0; i < key_count; i++)
        {
            /* Nulls in the array are ignored */
            if (key_nulls[i])
                continue;
            entries[j++] = make_text_key(JGINFLAG_KEY,
                                         VARDATA(key_datums[i]),
                                         VARSIZE(key_datums[i]) - VARHDRSZ);
        }

        *nentries = j;
        /* ExistsAll with no keys should match everything */
        if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
            *searchMode = GIN_SEARCH_MODE_ALL;
    }
    else
    {
        elog(ERROR, "unrecognized strategy number: %d", strategy);
        entries = NULL;            /* keep compiler quiet */
    }

    PG_RETURN_POINTER(entries);
}

Datum
gin_consistent_jsonb(PG_FUNCTION_ARGS)
{// #lizard forgives
    bool       *check = (bool *) PG_GETARG_POINTER(0);
    StrategyNumber strategy = PG_GETARG_UINT16(1);

    /* Jsonb       *query = PG_GETARG_JSONB(2); */
    int32        nkeys = PG_GETARG_INT32(3);

    /* Pointer       *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
    bool       *recheck = (bool *) PG_GETARG_POINTER(5);
    bool        res = true;
    int32        i;

    if (strategy == JsonbContainsStrategyNumber)
    {
        /*
         * We must always recheck, since we can't tell from the index whether
         * the positions of the matched items match the structure of the query
         * object.  (Even if we could, we'd also have to worry about hashed
         * keys and the index's failure to distinguish keys from string array
         * elements.)  However, the tuple certainly doesn't match unless it
         * contains all the query keys.
         */
        *recheck = true;
        for (i = 0; i < nkeys; i++)
        {
            if (!check[i])
            {
                res = false;
                break;
            }
        }
    }
    else if (strategy == JsonbExistsStrategyNumber)
    {
        /*
         * Although the key is certainly present in the index, we must recheck
         * because (1) the key might be hashed, and (2) the index match might
         * be for a key that's not at top level of the JSON object.  For (1),
         * we could look at the query key to see if it's hashed and not
         * recheck if not, but the index lacks enough info to tell about (2).
         */
        *recheck = true;
        res = true;
    }
    else if (strategy == JsonbExistsAnyStrategyNumber)
    {
        /* As for plain exists, we must recheck */
        *recheck = true;
        res = true;
    }
    else if (strategy == JsonbExistsAllStrategyNumber)
    {
        /* As for plain exists, we must recheck */
        *recheck = true;
        /* ... but unless all the keys are present, we can say "false" */
        for (i = 0; i < nkeys; i++)
        {
            if (!check[i])
            {
                res = false;
                break;
            }
        }
    }
    else
        elog(ERROR, "unrecognized strategy number: %d", strategy);

    PG_RETURN_BOOL(res);
}

Datum
gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
{// #lizard forgives
    GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
    StrategyNumber strategy = PG_GETARG_UINT16(1);

    /* Jsonb       *query = PG_GETARG_JSONB(2); */
    int32        nkeys = PG_GETARG_INT32(3);

    /* Pointer       *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
    GinTernaryValue res = GIN_MAYBE;
    int32        i;

    /*
     * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
     * corresponds to always forcing recheck in the regular consistent
     * function, for the reasons listed there.
     */
    if (strategy == JsonbContainsStrategyNumber ||
        strategy == JsonbExistsAllStrategyNumber)
    {
        /* All extracted keys must be present */
        for (i = 0; i < nkeys; i++)
        {
            if (check[i] == GIN_FALSE)
            {
                res = GIN_FALSE;
                break;
            }
        }
    }
    else if (strategy == JsonbExistsStrategyNumber ||
             strategy == JsonbExistsAnyStrategyNumber)
    {
        /* At least one extracted key must be present */
        res = GIN_FALSE;
        for (i = 0; i < nkeys; i++)
        {
            if (check[i] == GIN_TRUE ||
                check[i] == GIN_MAYBE)
            {
                res = GIN_MAYBE;
                break;
            }
        }
    }
    else
        elog(ERROR, "unrecognized strategy number: %d", strategy);

    PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 *
 * jsonb_path_ops GIN opclass support functions
 *
 * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
 * value; but the JSON key(s) leading to each value are also included in its
 * hash computation.  This means we can only support containment queries,
 * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
 * since different hashes will be generated.
 *
 */

Datum
gin_extract_jsonb_path(PG_FUNCTION_ARGS)
{// #lizard forgives
    Jsonb       *jb = PG_GETARG_JSONB(0);
    int32       *nentries = (int32 *) PG_GETARG_POINTER(1);
    int            total = 2 * JB_ROOT_COUNT(jb);
    JsonbIterator *it;
    JsonbValue    v;
    JsonbIteratorToken r;
    PathHashStack tail;
    PathHashStack *stack;
    int            i = 0;
    Datum       *entries;

    /* If the root level is empty, we certainly have no keys */
    if (total == 0)
    {
        *nentries = 0;
        PG_RETURN_POINTER(NULL);
    }

    /* Otherwise, use 2 * root count as initial estimate of result size */
    entries = (Datum *) palloc(sizeof(Datum) * total);

    /* We keep a stack of partial hashes corresponding to parent key levels */
    tail.parent = NULL;
    tail.hash = 0;
    stack = &tail;

    it = JsonbIteratorInit(&jb->root);

    while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
    {
        PathHashStack *parent;

        /* Since we recurse into the object, we might need more space */
        if (i >= total)
        {
            total *= 2;
            entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
        }

        switch (r)
        {
            case WJB_BEGIN_ARRAY:
            case WJB_BEGIN_OBJECT:
                /* Push a stack level for this object */
                parent = stack;
                stack = (PathHashStack *) palloc(sizeof(PathHashStack));

                /*
                 * We pass forward hashes from outer nesting levels so that
                 * the hashes for nested values will include outer keys as
                 * well as their own keys.
                 *
                 * Nesting an array within another array will not alter
                 * innermost scalar element hash values, but that seems
                 * inconsequential.
                 */
                stack->hash = parent->hash;
                stack->parent = parent;
                break;
            case WJB_KEY:
                /* mix this key into the current outer hash */
                JsonbHashScalarValue(&v, &stack->hash);
                /* hash is now ready to incorporate the value */
                break;
            case WJB_ELEM:
            case WJB_VALUE:
                /* mix the element or value's hash into the prepared hash */
                JsonbHashScalarValue(&v, &stack->hash);
                /* and emit an index entry */
                entries[i++] = UInt32GetDatum(stack->hash);
                /* reset hash for next key, value, or sub-object */
                stack->hash = stack->parent->hash;
                break;
            case WJB_END_ARRAY:
            case WJB_END_OBJECT:
                /* Pop the stack */
                parent = stack->parent;
                pfree(stack);
                stack = parent;
                /* reset hash for next key, value, or sub-object */
                if (stack->parent)
                    stack->hash = stack->parent->hash;
                else
                    stack->hash = 0;
                break;
            default:
                elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
        }
    }

    *nentries = i;

    PG_RETURN_POINTER(entries);
}

Datum
gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
{
    int32       *nentries = (int32 *) PG_GETARG_POINTER(1);
    StrategyNumber strategy = PG_GETARG_UINT16(2);
    int32       *searchMode = (int32 *) PG_GETARG_POINTER(6);
    Datum       *entries;

    if (strategy != JsonbContainsStrategyNumber)
        elog(ERROR, "unrecognized strategy number: %d", strategy);

    /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
    entries = (Datum *)
        DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
                                            PG_GETARG_DATUM(0),
                                            PointerGetDatum(nentries)));

    /* ... although "contains {}" requires a full index scan */
    if (*nentries == 0)
        *searchMode = GIN_SEARCH_MODE_ALL;

    PG_RETURN_POINTER(entries);
}

Datum
gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
{
    bool       *check = (bool *) PG_GETARG_POINTER(0);
    StrategyNumber strategy = PG_GETARG_UINT16(1);

    /* Jsonb       *query = PG_GETARG_JSONB(2); */
    int32        nkeys = PG_GETARG_INT32(3);

    /* Pointer       *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
    bool       *recheck = (bool *) PG_GETARG_POINTER(5);
    bool        res = true;
    int32        i;

    if (strategy != JsonbContainsStrategyNumber)
        elog(ERROR, "unrecognized strategy number: %d", strategy);

    /*
     * jsonb_path_ops is necessarily lossy, not only because of hash
     * collisions but also because it doesn't preserve complete information
     * about the structure of the JSON object.  Besides, there are some
     * special rules around the containment of raw scalars in arrays that are
     * not handled here.  So we must always recheck a match.  However, if not
     * all of the keys are present, the tuple certainly doesn't match.
     */
    *recheck = true;
    for (i = 0; i < nkeys; i++)
    {
        if (!check[i])
        {
            res = false;
            break;
        }
    }

    PG_RETURN_BOOL(res);
}

Datum
gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
{
    GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
    StrategyNumber strategy = PG_GETARG_UINT16(1);

    /* Jsonb       *query = PG_GETARG_JSONB(2); */
    int32        nkeys = PG_GETARG_INT32(3);

    /* Pointer       *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
    GinTernaryValue res = GIN_MAYBE;
    int32        i;

    if (strategy != JsonbContainsStrategyNumber)
        elog(ERROR, "unrecognized strategy number: %d", strategy);

    /*
     * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
     * corresponds to always forcing recheck in the regular consistent
     * function, for the reasons listed there.
     */
    for (i = 0; i < nkeys; i++)
    {
        if (check[i] == GIN_FALSE)
        {
            res = GIN_FALSE;
            break;
        }
    }

    PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 * Construct a jsonb_ops GIN key from a flag byte and a textual representation
 * (which need not be null-terminated).  This function is responsible
 * for hashing overlength text representations; it will add the
 * JGINFLAG_HASHED bit to the flag value if it does that.
 */
static Datum
make_text_key(char flag, const char *str, int len)
{
    text       *item;
    char        hashbuf[10];

    if (len > JGIN_MAXLENGTH)
    {
        uint32        hashval;

        hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
        snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
        str = hashbuf;
        len = 8;
        flag |= JGINFLAG_HASHED;
    }

    /*
     * Now build the text Datum.  For simplicity we build a 4-byte-header
     * varlena text Datum here, but we expect it will get converted to short
     * header format when stored in the index.
     */
    item = (text *) palloc(VARHDRSZ + len + 1);
    SET_VARSIZE(item, VARHDRSZ + len + 1);

    *VARDATA(item) = flag;

    memcpy(VARDATA(item) + 1, str, len);

    return PointerGetDatum(item);
}

/*
 * Create a textual representation of a JsonbValue that will serve as a GIN
 * key in a jsonb_ops index.  is_key is true if the JsonbValue is a key,
 * or if it is a string array element (since we pretend those are keys,
 * see jsonb.h).
 */
static Datum
make_scalar_key(const JsonbValue *scalarVal, bool is_key)
{
    Datum        item;
    char       *cstr;

    switch (scalarVal->type)
    {
        case jbvNull:
            Assert(!is_key);
            item = make_text_key(JGINFLAG_NULL, "", 0);
            break;
        case jbvBool:
            Assert(!is_key);
            item = make_text_key(JGINFLAG_BOOL,
                                 scalarVal->val.boolean ? "t" : "f", 1);
            break;
        case jbvNumeric:
            Assert(!is_key);

            /*
             * A normalized textual representation, free of trailing zeroes,
             * is required so that numerically equal values will produce equal
             * strings.
             *
             * It isn't ideal that numerics are stored in a relatively bulky
             * textual format.  However, it's a notationally convenient way of
             * storing a "union" type in the GIN B-Tree, and indexing Jsonb
             * strings takes precedence.
             */
            cstr = numeric_normalize(scalarVal->val.numeric);
            item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
            pfree(cstr);
            break;
        case jbvString:
            item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
                                 scalarVal->val.string.val,
                                 scalarVal->val.string.len);
            break;
        default:
            elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
            item = 0;            /* keep compiler quiet */
            break;
    }

    return item;
}
